@article{tang2020human,
  title={Human-centric Spatio-Temporal Video Grounding With Visual Transformers},
  author={Tang, Zongheng and Liao, Yue and Liu, Si and Li, Guanbin and Jin, Xiaojie and Jiang, Hongxu and Yu, Qian and Xu, Dong},
  journal={arXiv preprint arXiv:2011.05049},
  year={2020},
  url={https://arxiv.org/pdf/2011.05049.pdf}
}